RRPlot and the Colon data set

Libraries

library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
#library(corrplot)
#source("~/GitHub/FRESA.CAD/R/RRPlot.R")
#source("~/GitHub/FRESA.CAD/R/PoissonEventRiskCalibration.R")
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
#pander::panderOptions('table.split.table', 400)
pander::panderOptions('keep.trailing.zeros',TRUE)

The data set

data(cancer)
colon <- subset(colon,etype==1)
colon$etype <- NULL
rownames(colon) <- colon$id
colon$id <- NULL
colon <- colon[complete.cases(colon),]
time <- colon$time
status <- colon$status
data <- colon
data$time <- NULL
data$study <- NULL
table(data$status)

0 1 442 446

dataColon <- as.data.frame(model.matrix(status~.*age,data))
dataColon$`(Intercept)` <- NULL
dataColon$time <- time/365
dataColon$status <- status
colnames(dataColon) <-str_replace_all(colnames(dataColon),":","_")
colnames(dataColon) <-str_replace_all(colnames(dataColon),"\\.","_")
colnames(dataColon) <-str_replace_all(colnames(dataColon),"\\+","_")
data <- NULL

trainsamples <- sample(nrow(dataColon),0.7*nrow(dataColon))
dataColonTrain <- dataColon[trainsamples,]
dataColonTest <- dataColon[-trainsamples,]


pander::pander(table(dataColonTrain$status))
0 1
318 303
pander::pander(table(dataColonTest$status))
0 1
124 143

Modeling

ml <- BSWiMS.model(Surv(time,status)~1,data=dataColonTrain,NumberofRepeats = 10)

[++-++-++-+++-++-+++–+-+-+++-+++-+++-]..

sm <- summary(ml)
pander::pander(sm$coefficients)
Table continues below
  Estimate lower HR upper u.Accuracy
extent 4.12e-01 1.255 1.510 1.816 0.554
node4 3.97e-01 1.228 1.488 1.803 0.591
rxLev_5FU_age -5.32e-03 0.992 0.995 0.997 0.567
age_nodes 3.98e-05 1.000 1.000 1.000 0.589
nodes 2.62e-02 1.012 1.027 1.042 0.599
rxLev_5FU -1.24e-01 0.825 0.884 0.947 0.567
age_extent 1.06e-03 1.000 1.001 1.002 0.538
age_node4 1.10e-03 1.000 1.001 1.002 0.591
adhere 2.54e-02 1.005 1.026 1.047 0.544
surg 1.74e-02 1.003 1.018 1.032 0.549
Table continues below
  r.Accuracy full.Accuracy u.AUC r.AUC full.AUC
extent 0.594 0.624 0.562 0.589 0.627
node4 0.607 0.624 0.585 0.609 0.627
rxLev_5FU_age 0.595 0.624 0.571 0.589 0.627
age_nodes 0.565 0.609 0.586 0.567 0.607
nodes 0.606 0.622 0.596 0.606 0.622
rxLev_5FU 0.587 0.619 0.571 0.585 0.619
age_extent 0.608 0.622 0.539 0.607 0.622
age_node4 0.621 0.615 0.585 0.621 0.614
adhere 0.597 0.612 0.536 0.595 0.610
surg 0.593 0.614 0.544 0.590 0.612
  IDI NRI z.IDI z.NRI Delta.AUC Frequency
extent 0.02541 0.248 4.38 4.43 0.03844 1.0
node4 0.02196 0.334 4.11 4.83 0.01885 1.0
rxLev_5FU_age 0.01880 0.283 3.99 3.79 0.03863 1.0
age_nodes 0.01633 0.268 3.91 3.49 0.04011 1.0
nodes 0.01221 0.222 3.60 2.88 0.01622 1.0
rxLev_5FU 0.01524 0.283 3.54 3.79 0.03390 0.9
age_extent 0.01211 0.166 3.07 2.08 0.01429 0.5
age_node4 0.00711 0.279 2.86 4.09 -0.00656 0.9
adhere 0.00617 0.144 2.51 2.54 0.01454 0.4
surg 0.00602 0.174 2.40 2.45 0.02176 0.3

Cox Model Performance

Here we evaluate the model using the RRPlot() function.

The evaluation of the raw Cox model with RRPlot()

Here we will use the predicted event probability assuming a baseline hazard for events withing 5 years

index <- predict(ml,dataColonTrain)
timeinterval <- 2*mean(subset(dataColonTrain,status==1)$time)

h0 <- sum(dataColonTrain$status & dataColonTrain$time <= timeinterval)
h0 <- h0/sum((dataColonTrain$time > timeinterval) | (dataColonTrain$status==1))

rdata <- cbind(dataColonTrain$status,ppoisGzero(index,h0))

rrAnalysisTrain <- RRPlot(rdata,atProb=c(0.90),
                     timetoEvent=dataColonTrain$time,
                     title="Raw Train: Colon Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Uncalibrated Performance Report

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.520 0.369 0.241 0.1148 0.499
RR 1.511 1.827 4.524 19.6434 1.522
SEN 0.231 0.587 0.980 1.0000 0.251
SPE 0.896 0.704 0.145 0.0126 0.887
BACC 0.564 0.646 0.562 0.5063 0.569
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
1.01 0.899 1.13 0.862
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.51 1.51 1.49 1.53
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.32 1.32 1.32 1.33
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.667 0.666 0.636 0.697
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.689 0.648 0.73
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.231 0.185 0.283
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.861 0.93
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% at_max_BACC at_max_RR atSPE100 at_0.5
0.521 0.369 0.241 0.115 0.5
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.52 1.29 1.79
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 39.450139 on 1 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 519 233 267.9 4.54 39.5
class=1 102 70 35.1 34.64 39.5

Cox Calibration

op <- par(no.readonly = TRUE)


calprob <- CoxRiskCalibration(ml,dataColonTrain,"status","time")

pander::pander(c(h0=calprob$h0,
                 Gain=calprob$hazardGain,
                 DeltaTime=calprob$timeInterval),
               caption="Cox Calibration Parameters")
h0 Gain DeltaTime
0.648 1.5 3.2

The RRplot() of the calibrated model

h0 <- calprob$h0
timeinterval <- calprob$timeInterval;

rdata <- cbind(dataColonTrain$status,calprob$prob)


rrAnalysisTrain <- RRPlot(rdata,atProb=c(0.90),
                     timetoEvent=dataColonTrain$time,
                     title="Calibrated Train: Colon",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Calibrated Train Performance

pander::pander(t(rrAnalysisTrain$keyPoints),caption="Threshold values")
Threshold values
  @:0.9 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.666 0.497 0.338 0.1667 0.500
RR 1.511 1.827 4.524 19.6434 1.739
SEN 0.231 0.587 0.980 1.0000 0.551
SPE 0.896 0.704 0.145 0.0126 0.717
BACC 0.564 0.646 0.562 0.5063 0.634
pander::pander(t(rrAnalysisTrain$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.781 0.695 0.874 8.78e-06
pander::pander(t(rrAnalysisTrain$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
0.987 0.987 0.975 1
pander::pander(t(rrAnalysisTrain$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.02 1.02 1.02 1.02
pander::pander(rrAnalysisTrain$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.667 0.667 0.637 0.696
pander::pander(t(rrAnalysisTrain$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.689 0.648 0.73
pander::pander((rrAnalysisTrain$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.231 0.185 0.283
pander::pander((rrAnalysisTrain$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.899 0.861 0.93
pander::pander(t(rrAnalysisTrain$thr_atP),caption="Probability Thresholds")
Probability Thresholds
90% at_max_BACC at_max_RR atSPE100 at_0.5
0.667 0.497 0.338 0.167 0.5
pander::pander(t(rrAnalysisTrain$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.52 1.29 1.79
pander::pander(rrAnalysisTrain$surdif,caption="Logrank test")
Logrank test Chisq = 39.450139 on 1 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 519 233 267.9 4.54 39.5
class=1 102 70 35.1 34.64 39.5

Evaluating on the test set

The calibrated h0 and timeinterval were estimated on the training set

index <- predict(ml,dataColonTest)
rdata <- cbind(dataColonTest$status,ppoisGzero(index,h0))

rrAnalysisTest <- RRPlot(rdata,atThr = rrAnalysisTrain$thr_atP,
                     timetoEvent=dataColonTest$time,
                     title="Test: Colon Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

Test Performance

pander::pander(t(rrAnalysisTest$keyPoints),caption="Threshold values")
Threshold values (continued below)
  @:0.667424600803063 @:0.497313110373278 @:0.337641790153328
Thr 0.666 0.498 0.3383
RR 1.795 1.541 1.4555
SEN 0.308 0.615 0.9580
SPE 0.927 0.613 0.0806
BACC 0.618 0.614 0.5193
  @:0.16666168331968 @:0.5 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.15230 0.500 0.512 0.345 0.15230 0.500
RR 1.00000 1.546 1.791 2.320 1.00000 1.546
SEN 1.00000 0.587 0.517 0.937 1.00000 0.587
SPE 0.00806 0.645 0.790 0.218 0.00806 0.645
BACC 0.50403 0.616 0.654 0.577 0.50403 0.616
pander::pander(t(rrAnalysisTest$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.954 0.804 1.12 0.624
pander::pander(t(rrAnalysisTest$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.31 1.31 1.27 1.34
pander::pander(t(rrAnalysisTest$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.1 1.1 1.09 1.1
pander::pander(rrAnalysisTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.649 0.649 0.604 0.694
pander::pander(t(rrAnalysisTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.698 0.636 0.76
pander::pander((rrAnalysisTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.308 0.233 0.39
pander::pander((rrAnalysisTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.935 0.877 0.972
pander::pander(t(rrAnalysisTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds (continued below)
90% at_max_BACC at_max_RR atSPE100 at_0.5 at_max_BACC at_max_RR
0.667 0.497 0.338 0.167 0.5 0.512 0.345
atSPE100 at_0.5
0.152 0.5
pander::pander(t(rrAnalysisTest$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.82 1.51 2.19
pander::pander(rrAnalysisTest$surdif,caption="Logrank test")
Logrank test Chisq = 39.298651 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 131 55 80.1 7.89156 18.0707
class=1 84 44 43.4 0.00935 0.0134
class=2 52 44 19.5 30.83498 36.1983

Cross-Validation

Here we will cross validate the training set and evaluate also on the testing set. The h0 and the timeinterval are the ones estimated on the calibration process

rcv <- randomCV(theData=dataColonTrain,
                theOutcome = Surv(time,status)~1,
                fittingFunction=BSWiMS.model, 
                trainFraction = 0.75,
                repetitions=50,
                classSamplingType = "Pro",
                testingSet=dataColonTest
         )

.[+++–].[++].[++++-].[+++++].[++++++–].[+++++].[++++-].[++++++++].[++-].[+++–]10 Tested: 851 Avg. Selected: 8.1 Min Tests: 1 Max Tests: 10 Mean Tests: 4.970623 . MAD: 0.4731277

.[++++++].[++++++++].[++++-].[++—].[+-].[++++++].[+++++++].[++++-].[+++++++-].[+++++++++]20 Tested: 886 Avg. Selected: 8.85 Min Tests: 1 Max Tests: 20 Mean Tests: 9.548533 . MAD: 0.473931

.[+++].[++++++].[++++-].[++++-].[++].[+++].[++++-].[+++-].[++++-].[++-+]30 Tested: 888 Avg. Selected: 8.9 Min Tests: 2 Max Tests: 30 Mean Tests: 14.29054 . MAD: 0.4730999

.[++++++].[+++-].[++++-].[++++–].[+++++++].[++++++].[++++–].[++++++].[++++-].[++++-]40 Tested: 888 Avg. Selected: 9 Min Tests: 3 Max Tests: 40 Mean Tests: 19.05405 . MAD: 0.4727452

.[++—].[+++++].[++++++].[++++-].[++++-].[++++++-].[+++++–].[+-+–].[++++++++].[++++–]50 Tested: 888 Avg. Selected: 8.88 Min Tests: 5 Max Tests: 50 Mean Tests: 23.81757 . MAD: 0.4729526

stp <- rcv$survTestPredictions
stp <- stp[!is.na(stp[,4]),]

bbx <- boxplot(unlist(stp[,1])~rownames(stp),plot=FALSE)
times <- bbx$stats[3,]
status <- boxplot(unlist(stp[,2])~rownames(stp),plot=FALSE)$stats[3,]
prob <- ppoisGzero(boxplot(unlist(stp[,4])~rownames(stp),plot=FALSE)$stats[3,],h0)

rdatacv <- cbind(status,prob)
rownames(rdatacv) <- bbx$names
names(times) <- bbx$names

rrAnalysisCVTest <- RRPlot(rdatacv,atThr = rrAnalysisTrain$thr_atP,
                     timetoEvent=times,
                     title="CV Test: Colon Cancer",
                     ysurvlim=c(0.00,1.0),
                     riskTimeInterval=timeinterval)

CV Test Performance

pander::pander(t(rrAnalysisCVTest$keyPoints),caption="Threshold values")
Threshold values (continued below)
  @:0.667424600803063 @:0.497313110373278 @:0.337641790153328
Thr 0.668 0.497 0.3457
RR 1.649 1.624 3.5498
SEN 0.168 0.536 0.9865
SPE 0.950 0.706 0.0792
BACC 0.559 0.621 0.5329
  @:0.16666168331968 @:0.5 @MAX_BACC @MAX_RR @SPE100 p(0.5)
Thr 0.21300 0.500 0.490 0.361 0.21300 0.500
RR 1.00000 1.596 1.738 2.276 1.00000 1.596
SEN 1.00000 0.502 0.592 0.969 1.00000 0.502
SPE 0.00452 0.729 0.683 0.106 0.00452 0.729
BACC 0.50226 0.615 0.638 0.537 0.50226 0.615
pander::pander(t(rrAnalysisCVTest$OERatio$estimate),caption="O/E Ratio")
O/E Ratio
O/E Low Upper p.value
0.8 0.728 0.878 1.21e-06
pander::pander(t(rrAnalysisCVTest$OE95ci),caption="O/E Mean")
O/E Mean
mean 50% 2.5% 97.5%
1.07 1.07 1.06 1.08
pander::pander(t(rrAnalysisCVTest$OAcum95ci),caption="O/Acum Mean")
O/Acum Mean
mean 50% 2.5% 97.5%
1.04 1.04 1.04 1.04
pander::pander(rrAnalysisCVTest$c.index$cstatCI,caption="C. Index")
mean.C Index median lower upper
0.65 0.65 0.624 0.674
pander::pander(t(rrAnalysisCVTest$ROCAnalysis$aucs),caption="ROC AUC")
ROC AUC
est lower upper
0.672 0.637 0.707
pander::pander((rrAnalysisCVTest$ROCAnalysis$sensitivity),caption="Sensitivity")
Sensitivity
est lower upper
0.168 0.135 0.206
pander::pander((rrAnalysisCVTest$ROCAnalysis$specificity),caption="Specificity")
Specificity
est lower upper
0.95 0.926 0.969
pander::pander(t(rrAnalysisCVTest$thr_atP),caption="Probability Thresholds")
Probability Thresholds (continued below)
90% at_max_BACC at_max_RR atSPE100 at_0.5 at_max_BACC at_max_RR
0.667 0.497 0.338 0.167 0.5 0.49 0.361
atSPE100 at_0.5
0.213 0.5
pander::pander(t(rrAnalysisCVTest$RR_atP),caption="Risk Ratio")
Risk Ratio
est lower upper
1.65 1.45 1.88
pander::pander(rrAnalysisCVTest$surdif,caption="Logrank test")
Logrank test Chisq = 99.011012 on 2 degrees of freedom, p = 0.000000
  N Observed Expected (O-E)^2/E (O-E)^2/V
class=0 520 208 295.5 25.9 77.3
class=1 271 163 117.9 17.3 23.6
class=2 97 75 32.7 54.8 59.6